4 Laboratory sample processing
4.1 DNA extraction
4.1.1 General statistics
read_tsv("data/extraction.tsv") %>%
summarise(
max= max(extraction_total, na.rm = TRUE),
min= min(extraction_total, na.rm = TRUE),
mean= mean(extraction_total, na.rm = TRUE),
sd = sd(extraction_total, na.rm = TRUE)
) %>%
tt()| max | min | mean | sd |
|---|---|---|---|
| 7110 | 0 | 365.683 | 677.4012 |
4.1.2 Sample types
4.1.2.1 Data distribution
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
ggplot(., aes(x=extraction_total, fill=sample_type, color=sample_type)) +
geom_density() +
scale_color_manual(values = c("#bdca50", "#AA3377")) +
scale_fill_manual(values = c("#bdca5050", "#AA337750")) +
labs(y="Density",x="DNA yield", fill="Sample type", color="Sample type") +
theme_classic()4.1.2.2 Comparison
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(extraction_total = ifelse(extraction_total == 0, 0.00001, extraction_total)) %>%
mutate(extraction_total = log(extraction_total)) %>%
select(extraction_total,sample_type) %>%
mutate(sample_type = factor(sample_type)) %>%
wilcox.test(extraction_total ~ sample_type,data=.)
Wilcoxon rank sum test with continuity correction
data: extraction_total by sample_type
W = 335473, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(sample_type) %>%
summarise(
mean= mean(extraction_total, na.rm = TRUE),
sd = sd(extraction_total, na.rm = TRUE)) %>%
tt()| sample_type | mean | sd |
|---|---|---|
| Anal/cloacal swab | 170.2285 | 362.7389 |
| Faecal | 316.6666 | 423.6434 |
4.1.3 Taxonomy
4.1.3.1 Data distribution
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(!is.na(tax_group)) %>%
ggplot(., aes(x=extraction_total, fill=tax_group, color=tax_group)) +
geom_density() +
scale_color_manual(values = c("#228833","#66CCEE","#CCBB44","#4477AA","#EE6677")) +
scale_fill_manual(values = c("#22883350","#66CCEE50","#CCBB4450","#4477AA50","#EE667750")) +
labs(y="Density",x="DNA yield", fill="Sample type", color="Sample type") +
theme_classic()left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
ggplot(., aes(y=extraction_total,x=tax_group,color=tax_group)) +
geom_jitter() +
scale_color_manual(values = c("#22883380","#66CCEE80","#CCBB4480","#4477AA80","#EE667780")) +
labs(y="DNA yield",x="Taxonomic group", color="Sample type") +
theme_classic()4.1.3.2 Comparison
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
mutate(extraction_total = ifelse(extraction_total == 0, 0.00001, extraction_total)) %>%
mutate(extraction_total = log(extraction_total)) %>%
select(extraction_total,tax_group) %>%
mutate(sample_type = factor(tax_group)) %>%
kruskal.test(extraction_total ~ tax_group,data=.)
Kruskal-Wallis rank sum test
data: extraction_total by tax_group
Kruskal-Wallis chi-squared = 946.6, df = 4, p-value < 2.2e-16
left_join(read_tsv("data/extraction.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
group_by(tax_group) %>%
summarise(
mean= mean(extraction_total, na.rm = TRUE),
sd = sd(extraction_total, na.rm = TRUE)) %>%
tt()| tax_group | mean | sd |
|---|---|---|
| Amphibians | 233.35078 | 358.90219 |
| Bats | 59.60574 | 92.00357 |
| Birds | 74.43550 | 262.34517 |
| Mammals | 463.93202 | 471.41487 |
| Reptiles | 394.50619 | 437.86229 |
4.2 Sequencing library preparation
4.2.1 Overall
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(library_PCR_cycles_required > 0) %>%
summarise(
max= max(library_PCR_cycles_required, na.rm = TRUE),
min= min(library_PCR_cycles_required, na.rm = TRUE),
mean= mean(library_PCR_cycles_required, na.rm = TRUE),
sd = sd(library_PCR_cycles_required, na.rm = TRUE)) %>%
tt()| max | min | mean | sd |
|---|---|---|---|
| 33 | 2 | 9.150046 | 4.390537 |
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
lm(library_PCR_cycles_required ~ library_input_dna, data = .) %>%
summary()
Call:
lm(formula = library_PCR_cycles_required ~ library_input_dna,
data = .)
Residuals:
Min 1Q Median 3Q Max
-11.5911 -2.4314 -0.7679 1.8114 21.5487
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 11.5911019 0.1103983 104.99 <2e-16 ***
library_input_dna -0.0284251 0.0009006 -31.56 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.758 on 2443 degrees of freedom
(11 observations deleted due to missingness)
Multiple R-squared: 0.2897, Adjusted R-squared: 0.2894
F-statistic: 996.2 on 1 and 2443 DF, p-value: < 2.2e-16
4.2.2 Sample types
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(library_input_dna <= 200) %>%
ggplot(., aes(y=library_PCR_cycles_required, x=library_input_dna, color=sample_type, group=sample_type)) +
geom_point(alpha=0.5) +
stat_smooth(method = "lm", formula = y ~ x, geom = "smooth") +
scale_color_manual(values = c("#bdca50", "#AA3377")) +
theme_classic() +
labs(y="Required number of cycles",x="Amount of inputted DNA (ng)", color="Sample type") +
theme_classic()4.2.2.1 Comparisons
Relationship between the amount of inputted DNA (ng) and the required number of cycles.
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(library_PCR_cycles_required>0 & library_input_dna>0) %>%
lme(library_PCR_cycles_required ~ library_input_dna, random=~1 | sample_type, data = .) %>%
broom.mixed::tidy() %>%
tt()| effect | group | term | estimate | std.error | df | statistic | p.value |
|---|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | 11.87417166 | 1.1716203608 | 2330 | 10.13483 | 1.187767e-23 |
| fixed | NA | library_input_dna | -0.02501844 | 0.0008256453 | 2330 | -30.30168 | 2.507418e-170 |
| ran_pars | sample_type | sd_(Intercept) | 1.64945139 | NA | NA | NA | NA |
| ran_pars | Residual | sd_Observation | 3.30432885 | NA | NA | NA | NA |
Comparison between slopes.
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
lm(library_PCR_cycles_required ~ library_input_dna * sample_type, data = .) %>%
broom::tidy() %>%
tt()| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 13.3047246163 | 0.187031107 | 71.13642643 | 0.000000e+00 |
| library_input_dna | -0.0260524410 | 0.001812871 | -14.37082274 | 5.140847e-45 |
| sample_typeFaecal | -2.4762116244 | 0.227612093 | -10.87908638 | 5.960119e-27 |
| library_input_dna:sample_typeFaecal | 0.0001282468 | 0.002078874 | 0.06169052 | 9.508143e-01 |
4.2.3 Taxonomy
left_join(read_tsv("data/library.tsv"),
read_tsv("data/sample.tsv"),
by="sample_id") %>%
filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
filter(library_input_dna <= 200) %>%
ggplot(., aes(y=library_PCR_cycles_required, x=library_input_dna, color=tax_group, group=tax_group)) +
geom_point(alpha=0.5) +
stat_smooth(aes(fill = tax_group), method = "lm", formula = y ~ x, geom = "smooth") +
scale_color_manual(values = c("#22883380","#66CCEE80","#CCBB4480","#4477AA80","#EE667780")) +
scale_fill_manual(values = c("#22883350","#66CCEE50","#CCBB4450","#4477AA50","#EE667750")) +
theme_classic() +
labs(y="Required number of cycles",x="Amount of inputted DNA (ng)", color="Sample type", fill="Sample type") +
theme_classic()